/* ==================================================================
File        : deferredFilterShadows.hlsl
Author      : Vassilis Poulopoulos
Date        : 18/09/2008 15:13:46
Format      : HLSL/Cg
Description : Deferred shadow mapping implementation

================================================================== */
 
#include "DeferredFilterCommon.hlsl"

//------------------------------------------------------------------------------------------------
// Shader constants
//------------------------------------------------------------------------------------------------

struct ShadowCommon
{
	float4			shadowFactors;			// .x	Overall shadow intensity factor
											// .y	Shadow view space falloff start
											// .z	1.0 / (falloff end - start)
											// .w	z bias
	float4			lightDirPos;			// The light's position or direction in view space
	float4			params;					// .x = soft shadow sample count
											// .y = half sample count (kernel radius)
											// .z = shadow noise intensity
											// .w = <unused>
};

struct ShadowParallelOrtho
{
	float4			ZRangeMin;				// Used for parallel split. Contains the min z of up to 4 frustum splits.
	float4			ZRangeMax;				// Contains the max z of up to 4 frustum splits.

	rm float4x4		eye2shadowMapUVMtx;		// Transform matrix for *first* split
	float4			OrthoShiftU;			// Contain the shift/scale of split N against split 0.
	float4			OrthoShiftV;			// Used only for parallel splits using orthographic
	float4			OrthoShiftZ;			// projection (either fixed or tracking).
	float4			OrthoScaleU;
	float4			OrthoScaleV;			
	float4			OrthoScaleZ;
	float4			softShadowBlockerRadius;// For each split
	float4			softShadowSpreadRatio;	// For each split
	float4			noiseScale;				// For each split
};

struct ShadowParallelLISPSM
{
	float4			ZRangeMin;				// Used for parallel split. Contains the min z of up to 4 frustum splits.
	float4			ZRangeMax;				// Contains the max z of up to 4 frustum splits.

	rm float4x4		eye2shadowMapUVMtx0;	// Transform matrix for *each* split
	rm float4x4		eye2shadowMapUVMtx1;
	rm float4x4		eye2shadowMapUVMtx2;
	rm float4x4		eye2shadowMapUVMtx3;
};

struct ShadowCascade
{
	rm float4x4		eye2lightProjMtx;		// Transform matrix for *first* cascade.

	float4			MinRadiusSq;			// Contains the min/max radius for each cascade, assuming the first
	float4			MaxRadiusSq;			// cascade occupies [-1,1] and the rest are proportionally bigger.
	float4			CascadeUVScale;			// Contains the scale factors to apply to go from UVs caclulate for the 1st cascade
											// to UVs for the Nth cascade.
	float4			CascadeZScale;			// Z scale & bias of cascade N against the 1st one. Each
	float4			CascadeZShift;			// cascade has a different near/far Z.

	float4			softShadowBlockerRadius;// For each cascade
	float4			softShadowSpreadRatio;	// For each cascade
	float4			noiseScale;				// For each cascade
};

struct ShadowGrid
{
	rm float4x4		eye2lightProjMtx;		// Transform matrix.
	float4			softShadowParams;		// .x = PCSS blocker search radius
											// .y = PCSS spread ratio
											// .z = noise scale
											// .w = <unused>
};

struct ShadowPointSpot
{
	rm float4x4		eye2shadowMapUVMtx;		// The transform matrix
	float4			params;					// .x	Inner radius
											// .y	1.0 / (outer - inner radius)
											// .z	Spot near plane
};

struct ShadowDualParaboloid
{
	rm float4x4		viewMtx;				// The transform matrix
	float4			params;					// .x	Inner radius
											// .y	1.0 / (outer - inner radius)
											// .z	Near Z
											// .w	1.0 / (Far Z - Near Z)
};

struct ShadowCube
{
	rm float4x4		viewMtx;				// Matrix to transform world position into light space
	float4			projParams;				// .xy = zNear, (zFar - zNear) / zFar, .z = f/(f-n), .w = -nf(f-n)
	float4			params;					// .x	Inner radius
											// .y	1.0 / (outer - inner radius)
											// .z	Outer radius
											// .w	<unused>
};

ShadowCommon			g_ShadowCommon		: register(c150);

ShadowParallelOrtho		g_Parallel			: register(c153);
ShadowParallelLISPSM	g_LISPSM			: register(c153);
ShadowCascade			g_Cascade			: register(c153);
ShadowGrid				g_Grid				: register(c153);
ShadowPointSpot			g_SpotPoint			: register(c153);
ShadowDualParaboloid	g_DualParaboloid	: register(c153);
ShadowCube				g_Cube				: register(c153);


// Used for 2nd forward light
ShadowCommon			g_ShadowCommon1		: register(c197);

ShadowParallelOrtho		g_Parallel1			: register(c200);
ShadowParallelLISPSM	g_LISPSM1			: register(c200);
ShadowCascade			g_Cascade1			: register(c200);
ShadowGrid				g_Grid1				: register(c200);
ShadowPointSpot			g_SpotPoint1		: register(c200);
ShadowDualParaboloid	g_DualParaboloid1	: register(c200);
ShadowCube				g_Cube1				: register(c200);

struct SoftShadowParams
{
	float	blockerSearchRadius;	// in pixels
	float	spreadRatio;			// in pixels / unit length
	float	sampleCount;
	float	halfSampleCount;
	float	noiseScale;
	float	noiseIntensity;
};

#if defined(_360_TARGET)
	static const float g_ShadowMapSize = 960.0;
#else
	static const float g_ShadowMapSize = 1024.0;
#endif

static const float2 g_PoissonDisk[35] = 
{
	float2( -0.59503841, -0.33438265 ), 
	float2( -0.76340139, -0.05366653 ), 
	float2( -0.00587583, +0.23720980 ), 
	float2( +0.26433408, +0.91585755 ), 
	float2( +0.28839147, +0.17569196 ), 
	float2( +0.10122347, +0.03284788 ), 
	float2( +0.99747407, -0.51923263 ), 
	float2( -0.50814432, +0.62881708 ), 
	float2( -0.28404582, +0.03443611 ), 
	float2( +0.21612251, -0.92605925 ), 
	float2( +0.41555202, +0.25930381 ), 
	float2( +0.78880525, +0.90941250 ), 
	float2( -0.69868302, -0.53556216 ), 
	float2( -0.38935751, +0.62870920 ), 
	float2( +0.92552221, +0.85720789 ), 
	float2( -0.37237096, -0.58814049 ), 
	float2( -0.74326515, -0.02866143 ), 
	float2( +0.31063271, -0.57653165 ), 
	float2( -0.19292885, +0.49412012 ), 
	float2( -0.52338910, +0.98214293 ), 
	float2( +0.47023356, -0.49241942 ), 
	float2( +0.25820756, +0.82683027 ), 
	float2( +0.83283424, -0.56223643 ), 
	float2( +0.96799815, +0.64146268 ), 
	float2( -0.76140279, +0.02654517 ), 
	float2( -0.84870666, -0.76545179 ), 
	float2( -0.55759859, -0.75019056 ), 
	float2( +0.17851007, +0.41095674 ), 
	float2( -0.06213856, -0.03706419 ), 
	float2( -0.73886478, +0.44989455 ), 
	float2( -0.47128052, -0.01989299 ), 
	float2( +0.03948128, +0.19380057 ), 
	float2( +0.79948747, -0.52947825 ), 
	float2( +0.97526872, +0.40444124 ), 
	float2( +0.85112631, +0.59042954 ), 
};

//------------------------------------------------------------------------------------------------
// Shader const dispatcher for multiple lights
//------------------------------------------------------------------------------------------------
static int g_LightIndex = 0;

ShadowCommon GetCommon()
{
	if (g_LightIndex == 0)
		return g_ShadowCommon;
	else
		return g_ShadowCommon1;
}

ShadowParallelOrtho GetParallel()
{
	if (g_LightIndex == 0)
		return g_Parallel;
	else
		return g_Parallel1;
}

ShadowParallelLISPSM GetLISPSM()
{
	if (g_LightIndex == 0)
		return g_LISPSM;
	else
		return g_LISPSM1;
}

ShadowCascade GetCascade()
{
	if (g_LightIndex == 0)
		return g_Cascade;
	else
		return g_Cascade1;
}

ShadowGrid GetGrid()
{
	if (g_LightIndex == 0)
		return g_Grid;
	else
		return g_Grid1;
}

ShadowPointSpot GetSpotPoint()
{
	if (g_LightIndex == 0)
		return g_SpotPoint;
	else
		return g_SpotPoint1;
}

ShadowDualParaboloid GetDualParaboloid()
{
	if (g_LightIndex == 0)
		return g_DualParaboloid;
	else
		return g_DualParaboloid1;
}

ShadowCube GetCube()
{
	if (g_LightIndex == 0)
		return g_Cube;
	else
		return g_Cube1;
}

//------------------------------------------------------------------------------------------------
// Fwd declarations
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapParallel		(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);
float4 ProjectToShadowMapLISPSM			(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);
float4 ProjectToShadowMapCascade		(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);
float4 ProjectToShadowMapGrid			(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);
float4 ProjectToShadowMapSpotPoint		(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);
float4 ProjectToShadowMapDualParaboloid	(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);
float4 ProjectToShadowMapCube			(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams);

//------------------------------------------------------------------------------------------------
// Takes in a position in eye space and project it to shadowmap UV coordinates
// result.xy	shadowmap UV coords
// result.z		z in shadow map space. This is the z that needs to be compared against the shadow map depth value.
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMap(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
#if defined(PARALLEL)
	#if defined(LISPSM)
		return ProjectToShadowMapLISPSM(viewSpaceCoords, outSoftShadowParams);
	#else
		return ProjectToShadowMapParallel(viewSpaceCoords, outSoftShadowParams);
	#endif
#elif defined(CASCADE)
	return ProjectToShadowMapCascade(viewSpaceCoords, outSoftShadowParams);
#elif defined(GRID)
	return ProjectToShadowMapGrid(viewSpaceCoords, outSoftShadowParams);
#elif defined(SPOT_LIGHT) || defined(POINT_LIGHT)
	return ProjectToShadowMapSpotPoint(viewSpaceCoords, outSoftShadowParams);
#elif defined(DUAL_PARABOLOID)
	return ProjectToShadowMapDualParaboloid(viewSpaceCoords, outSoftShadowParams);
#elif defined(CUBE)
	return ProjectToShadowMapCube(viewSpaceCoords, outSoftShadowParams);
#else
	return 0;
#endif
}

//------------------------------------------------------------------------------------------------
// Implementation for parallel split shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapParallel(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	float3 outShadowMapCoords; // intentionally not initialized

	float4 nearMask = viewSpaceCoords.zzzz > GetParallel().ZRangeMin;
	float4 farMask = viewSpaceCoords.zzzz < GetParallel().ZRangeMax;
	float4 finalMask = nearMask * farMask;

	float3 scale;
	scale.x = dot(finalMask, GetParallel().OrthoScaleU);
	scale.y = dot(finalMask, GetParallel().OrthoScaleV);
	scale.z = dot(finalMask, GetParallel().OrthoScaleZ);

	float3 shift;
	shift.x = dot(finalMask, GetParallel().OrthoShiftU);
	shift.y = dot(finalMask, GetParallel().OrthoShiftV);
	shift.z = dot(finalMask, GetParallel().OrthoShiftZ);

	outShadowMapCoords = mul(viewSpaceCoords, GetParallel().eye2shadowMapUVMtx);

	outShadowMapCoords *= scale;
	outShadowMapCoords += shift;

	// Shadow acne fix - this has already been applied in OrthoScaleZ & OrthoShiftZ

	// Points outside the shadow range should not be in shadow.
	outShadowMapCoords *= dot(finalMask, 1) > 0 ? 1 : 0;

	outSoftShadowParams.blockerSearchRadius	= dot(GetParallel().softShadowBlockerRadius, finalMask);
	outSoftShadowParams.spreadRatio			= dot(GetParallel().softShadowSpreadRatio, finalMask);
	outSoftShadowParams.sampleCount			= GetCommon().params.x;
	outSoftShadowParams.halfSampleCount		= GetCommon().params.y;
	outSoftShadowParams.noiseScale			= dot(GetParallel().noiseScale, finalMask);
	outSoftShadowParams.noiseIntensity		= GetCommon().params.z;

	return float4(outShadowMapCoords,1);
}

//------------------------------------------------------------------------------------------------
// Implementation for parallel split LISPSM shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapLISPSM(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	float3 outShadowMapCoords; // intentionally not initialized

	float4 nearMask = viewSpaceCoords.zzzz > GetLISPSM().ZRangeMin;
	float4 farMask = viewSpaceCoords.zzzz < GetLISPSM().ZRangeMax;
	float4 finalMask = nearMask * farMask;

	outShadowMapCoords = 0;

	outShadowMapCoords += finalMask.x * mul(viewSpaceCoords, GetLISPSM().eye2shadowMapUVMtx0);
	outShadowMapCoords += finalMask.y * mul(viewSpaceCoords, GetLISPSM().eye2shadowMapUVMtx1);
	outShadowMapCoords += finalMask.z * mul(viewSpaceCoords, GetLISPSM().eye2shadowMapUVMtx2);
	outShadowMapCoords += finalMask.w * mul(viewSpaceCoords, GetLISPSM().eye2shadowMapUVMtx3);

	// Shadow acne fix
	outShadowMapCoords.z *= GetCommon().shadowFactors.w;

	// Points outside the shadow range should not be in shadow.
	outShadowMapCoords *= dot(finalMask, 1) > 0 ? 1 : 0;

	outSoftShadowParams.blockerSearchRadius	= 0;
	outSoftShadowParams.spreadRatio			= 0;
	outSoftShadowParams.sampleCount			= 1;
	outSoftShadowParams.halfSampleCount		= 1;
	outSoftShadowParams.noiseScale			= 0;
	outSoftShadowParams.noiseIntensity		= 0;

	return float4(outShadowMapCoords,1);
}

//------------------------------------------------------------------------------------------------
// Implementation for cascade shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapCascade(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	float3 outShadowMapCoords; // intentionally not initialized

	// Calculate the fragment's position in the projection space of the first cascade
	float3 lightProjCoords = mul(viewSpaceCoords, GetCascade().eye2lightProjMtx);

	// Calculate the radius of the fragment in light projection space. 
	// We'll use this to work out which cascade to sample.
	// [Update] Can't quite use the radius here. That's because the occluder culling was done using
	//			4 planes forming a square inside which this circe is inscribed. For radial clipping
	//			any occluder inside the square but outside the circle would need to also be rendered
	//			into the next shadow map...which we don't currently do.
	//float radiusSq = lightProjCoords.x * lightProjCoords.x + lightProjCoords.y * lightProjCoords.y;
	float radiusSq = max(lightProjCoords.x * lightProjCoords.x, lightProjCoords.y * lightProjCoords.y);

	float4 minMask = radiusSq.xxxx > GetCascade().MinRadiusSq;
	float4 maxMask = radiusSq.xxxx < GetCascade().MaxRadiusSq;
	float4 finalMask = minMask * maxMask;

	float uvScale = dot(finalMask, GetCascade().CascadeUVScale);
	float zScale = dot(finalMask, GetCascade().CascadeZScale);
	float zShift = dot(finalMask, GetCascade().CascadeZShift);

	// Go from [-1,1] space to UV coords
	outShadowMapCoords.xy = lightProjCoords.xy * uvScale;
	outShadowMapCoords.xy = outShadowMapCoords.xy * float2(0.5, -0.125) + float2(0.5, 0.125);

	outShadowMapCoords.y += dot(finalMask, float4(0, 0.25, 0.50, 0.75));

	outShadowMapCoords.z = lightProjCoords.z * zScale + zShift;

	// Shadow acne fix - this has already been applied in CascadeZScale & CascadeZShift

	// If we're outside the largest cascade force no shadow
	//outShadowMapCoords *= (radiusSq > GetCascade().MaxRadiusSq.w) ? 0 : 1;
	outShadowMapCoords *= dot(finalMask, 1) > 0 ? 1 : 0;

	outSoftShadowParams.blockerSearchRadius	= dot(GetCascade().softShadowBlockerRadius, finalMask);
	outSoftShadowParams.spreadRatio			= dot(GetCascade().softShadowSpreadRatio, finalMask);
	outSoftShadowParams.sampleCount			= GetCommon().params.x;
	outSoftShadowParams.halfSampleCount		= GetCommon().params.y;
	outSoftShadowParams.noiseScale			= dot(GetCascade().noiseScale, finalMask);
	outSoftShadowParams.noiseIntensity		= GetCommon().params.z;

	return float4(outShadowMapCoords,1);
}

//------------------------------------------------------------------------------------------------
// Implementation for grid shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapGrid(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	float4 outShadowMapCoords; // intentionally not initialized

	outShadowMapCoords = mul(viewSpaceCoords, GetGrid().eye2lightProjMtx);

	// If we're outside the first shadow tile, force no shadows.
	outShadowMapCoords *= outShadowMapCoords.y > 0.25 ? 0 : 1;

	// Shadow acne fix
	outShadowMapCoords.z *= GetCommon().shadowFactors.w;

	outSoftShadowParams.blockerSearchRadius	= GetGrid().softShadowParams.x;
	outSoftShadowParams.spreadRatio			= GetGrid().softShadowParams.y;
	outSoftShadowParams.sampleCount			= GetCommon().params.x;
	outSoftShadowParams.halfSampleCount		= GetCommon().params.y;
	outSoftShadowParams.noiseScale			= GetGrid().softShadowParams.z;
	outSoftShadowParams.noiseIntensity		= GetCommon().params.z;

	return outShadowMapCoords;
}

//------------------------------------------------------------------------------------------------
// Implementation for point & spot light shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapSpotPoint(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	float4 outShadowMapCoords; // intentionally not initialized

	outShadowMapCoords = mul(viewSpaceCoords, GetSpotPoint().eye2shadowMapUVMtx);
	outShadowMapCoords.xyz /= outShadowMapCoords.w;

	// Shadow acne fix
	outShadowMapCoords.z *= GetCommon().shadowFactors.w;

	outSoftShadowParams.blockerSearchRadius	= 0;
	outSoftShadowParams.spreadRatio			= 0;
	outSoftShadowParams.sampleCount			= 1;
	outSoftShadowParams.halfSampleCount		= 1;
	outSoftShadowParams.noiseScale			= 0;
	outSoftShadowParams.noiseIntensity		= 0;

	return outShadowMapCoords;
}

//------------------------------------------------------------------------------------------------
// Implementation for dual paraboloid shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapDualParaboloid(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	// Ref : http://www.mpi-inf.mpg.de/~tannen/papers/cgi_02.pdf
	// Ref : http://www.gamedev.net/reference/articles/article2308.asp

	float4 outShadowMapCoords; // intentionally not initialized

	outShadowMapCoords = mul(viewSpaceCoords, GetDualParaboloid().viewMtx);

	float negativeHalfSpaceFactor = sign(outShadowMapCoords.z);

	outShadowMapCoords.xz *= negativeHalfSpaceFactor;

	float finalZ = length(outShadowMapCoords.xyz);
	outShadowMapCoords /= finalZ;
	outShadowMapCoords.xy /= (outShadowMapCoords.z + 1.0);

	// Convert to UV coords
	outShadowMapCoords.xy = outShadowMapCoords.xy * float2(0.5, -0.125) + float2(0.5, 0.125);
	//outShadowMapCoords.xy = outShadowMapCoords.xy * float2(0.25, -0.25) + float2(0.25, 0.25);

	// Shift UV based on whether we're in front or behind the first paraboloid half-space
	outShadowMapCoords.y += (negativeHalfSpaceFactor<0) ? 0.25 : 0;
	//outShadowMapCoords.y += (negativeHalfSpaceFactor<0) ? 0.5 : 0;

	// Store distance from light as Z
	outShadowMapCoords.z = (finalZ - GetDualParaboloid().params.z) * GetDualParaboloid().params.w;

	outSoftShadowParams.blockerSearchRadius	= 0;
	outSoftShadowParams.spreadRatio			= 0;
	outSoftShadowParams.sampleCount			= 1;
	outSoftShadowParams.halfSampleCount		= 1;
	outSoftShadowParams.noiseScale			= 0;
	outSoftShadowParams.noiseIntensity		= 0;

	return outShadowMapCoords;
}

//------------------------------------------------------------------------------------------------
// Implementation for cube shadow maps
//------------------------------------------------------------------------------------------------
float4 ProjectToShadowMapCube(float4 viewSpaceCoords, out SoftShadowParams outSoftShadowParams)
{
	float4 outShadowMapCoords; // intentionally not initialized

	outShadowMapCoords = mul(viewSpaceCoords, GetCube().viewMtx);

	//float4 lightSpaceCoords = mul(viewSpaceCoords, GetCube().viewMtx);

	//float3 positiveFace = (lightSpaceCoords>0.7071067);
	//float3 negativeFace = (lightSpaceCoords<-0.7071067);
	//float3 face = positiveFace + negativeFace;

	//// Pick cube map face
	//outShadowMapCoords = 0;
	//outShadowMapCoords += face.x * lightSpaceCoords.xyzz;	// front/back
	//outShadowMapCoords += face.y * lightSpaceCoords.xzyy;	// up/down
	//outShadowMapCoords += face.z * lightSpaceCoords.zyxx;	// right/left

	//// Do projection transform
	//outShadowMapCoords.z = outShadowMapCoords.z * GetCube().projParams.z + GetCube().projParams.w;
	//outShadowMapCoords.xyz /= outShadowMapCoords.w;

	//// Go from [-1,1] space to UV coords
	//outShadowMapCoords.xy = outShadowMapCoords.xy * float2(0.25, -0.125) + float2(0.25, 0.125);

	//// Shift UVs to correct tile
	//outShadowMapCoords.x += dot(negativeFace, 0.5);
	//outShadowMapCoords.y += dot(face, float3(0, 0.25, 0.50));

	//// Shadow acne fix
	//outShadowMapCoords.z *= GetCommon().shadowFactors.w;

	outSoftShadowParams.blockerSearchRadius	= 0;
	outSoftShadowParams.spreadRatio			= 0;
	outSoftShadowParams.sampleCount			= 1;
	outSoftShadowParams.halfSampleCount		= 1;
	outSoftShadowParams.noiseScale			= 0;
	outSoftShadowParams.noiseIntensity		= 0;

	return outShadowMapCoords;
}


//------------------------------------------------------------------------------------------------
// Fwd declarations
//------------------------------------------------------------------------------------------------
float4 LookupShadowmapCube(float4 lightSpaceCoords, float4 fragPosViewSpace, SoftShadowParams softShadowParams);

//------------------------------------------------------------------------------------------------
// Given a set of lightspace coords, it looks up the shadow map and returns the shadow intensity.
// lightSpaceCoords should be the result of ProjectToShadowMap()
// lightSpaceCoords.xy	The UV coords to use for shadowmap lookup
// lightSpaceCoords.z	The z value to compare against the shadow map Z
//------------------------------------------------------------------------------------------------
float LookupShadowmap(float4 lightSpaceCoords, float4 fragPosViewSpace, SoftShadowParams softShadowParams)
{
#if defined(CUBE)
	return LookupShadowmapCube(lightSpaceCoords, fragPosViewSpace, softShadowParams);
#endif

	float4 coords = lightSpaceCoords;

	// Some shadowmaps are tightly focused around the shadow _casters_. This means that a shadow receiver
	// that is further away that  the last shadow caster would get a Z of >1 and therefore always be in shadow
	// (since the maximum/clear value of the shadow map is 1.0). To get around this we set the Z range of the
	// shadow map to be a little further than the furthest shadow caster. This means that a value of 1.0 in the
	// shadow map means no occluder, and a value of <1.0 means there's an occluder.
	// By clamping the fragment's z to 0.99 we're ensuring that it's within the Z range of the shadowmap (i.e. <1)
	// but it will still get correct shadow from the occluders (which have Z values of < 0.99).
	float unclampedFragmentZ = coords.z;
	coords.z = min(coords.z, 0.99);

#define PCF

#if !defined(PCF)
	
	float shadowMapZ = readShadowMap(g_ShadowMapTex, coords.xy);

	return (coords.z > shadowMapZ) ? 1.0 : 0.0;

#else // PCF

	#if defined(_360_TARGET)

		float4 weights;
		float4 shadowMapZ;

		asm {
			tfetch2D shadowMapZ.x___, coords.xy, g_ShadowMapTex, OffsetX = -0.5, OffsetY = -0.5
			tfetch2D shadowMapZ._x__, coords.xy, g_ShadowMapTex, OffsetX =  0.5, OffsetY = -0.5
			tfetch2D shadowMapZ.__x_, coords.xy, g_ShadowMapTex, OffsetX = -0.5, OffsetY =  0.5
			tfetch2D shadowMapZ.___x, coords.xy, g_ShadowMapTex, OffsetX =  0.5, OffsetY =  0.5

			getWeights2D weights, coords.xy, g_ShadowMapTex, MagFilter=linear, MinFilter=linear
		};

		weights = float4(	(1-weights.x)	*	(1-weights.y), 
							weights.x		*	(1-weights.y),
							(1-weights.x)	*	weights.y,
							weights.x		*	weights.y );
	        
		float4 depthCompare = coords.z > shadowMapZ;

		return dot(depthCompare, weights);

	#elif defined(CG_PS3)

		return tex2Dproj(g_ShadowMapTex, float4(coords.xyz,1));

	#elif defined(_WINPC)

		// Supported combinations
		// 0.Fallback					: FromColor,	FromColor,		Manual PCF
		// 1.nVidia 6xxx, 7xxx			: ARG,			ARG,			Hardware PCF
		// 2.nVidia 8xxx, ATI 4xxx+		: Float,		Float,			Hardware PCF
		// 3.ATI 1xxx+					: FromColor,	Float,			Fetch4 PCF
		// 4.ATI 2xxx+					: FromColor,	Float,			Hardware PCF

		#if defined(_HARDWAREPCF)

			return 1 - tex2Dproj(g_ShadowMapTex, float4(coords.xyz,1));

		#elif defined(_FETCH4PCF)

			float4 shadowMapZ = tex2D(g_ShadowMapTex, coords.xy);

			float4 depthCompare = coords.z > shadowMapZ;

			float4 weights = frac(coords.xyxy * g_ShadowMapSize * float4(1,4,1,4));

			weights = float4(	   weights.x	*	(1-weights.y),		// R == Top Right
								(1-weights.x)	*	   weights.y,		// G == Bottom Left
								   weights.x	*	   weights.y,		// B == Bottom Right
								(1-weights.x)	*	(1-weights.y) );	// A == Top Left

			return dot(depthCompare, weights);

		#elif defined(_MANUALPCF)

			#if defined(SOFT_SHADOWS)

				// PCSS Regular grid

				//static const int kernelHalfSize = 6;
				int kernelHalfSize = softShadowParams.halfSampleCount;

				//float2 blockerRadius = (1.0 / g_ShadowMapSize) * softShadowParams.blockerSearchRadius;
				float2 blockerRadius = (1.0 / g_ShadowMapSize) * unclampedFragmentZ * softShadowParams.spreadRatio;

				blockerRadius.y *= 0.25;

				float2 blockerRadialStep = blockerRadius / (float)kernelHalfSize;

				// ----------------------
				// Blocker search
		
				float averageOccluderZ = 0;
				float occluderCount = 0;

				{
					for (int i=-kernelHalfSize; i<(kernelHalfSize+1); i++)
					{
						for (int j=-kernelHalfSize; j<(kernelHalfSize+1); j++)
						{
							float occluderDepth = readShadowMap(g_ShadowMapTex, coords.xy + float2(i, j) * blockerRadialStep);

							if (coords.z > occluderDepth)
							{
								averageOccluderZ += occluderDepth;
								//averageOccluderZ = max(averageOccluderZ, occluderDepth);
								occluderCount += 1.0;
							}
						}
					}
				}

				// No occluders, no shadow
				if (occluderCount == 0)
					return 0;

				averageOccluderZ /= occluderCount;

				// ----------------------
				// PCF

				float2 radius = (1.0 / g_ShadowMapSize) * (unclampedFragmentZ-averageOccluderZ) * softShadowParams.spreadRatio;
				float2 radialStep = radius / (float)kernelHalfSize;

				float shadowIntensity = 0;
				float count = 0;

				//softShadowParams.noiseScale /= fragPosViewSpace.z;

				float4 noise = tex2Dlod(g_NoiseTex, float4(coords.xy * float2(1,4) * softShadowParams.noiseScale, 0, 0));

				noise.xy = noise.xy * 2 - 1;
				
				// Calculate sin/cos for noise (so that we can apply intensity)
				// The texture .xy already contains cos/sin
				sincos(noise.b * 3.14159 * 0.25 * softShadowParams.noiseIntensity, noise.y, noise.x);

				static const bool useRegularGrid = true;
				
				if (useRegularGrid)
				{
					for (int i=-kernelHalfSize; i<(kernelHalfSize+1); i++)
					{
						for (int j=-kernelHalfSize; j<(kernelHalfSize+1); j++)
						{
							float2 texCoord1 = float2(i, j) * radialStep;
							float2 texCoord;
							
							texCoord.x = texCoord1.x * noise.x - texCoord1.y * noise.y;
							texCoord.y = texCoord1.x * noise.y + texCoord1.y * noise.x;

							texCoord.y *= 0.25;

							shadowIntensity += (coords.z > readShadowMap(g_ShadowMapTex, coords.xy + texCoord));
							count += 1.0;
						}
					}
				}
				else
				{
					// !!!!WARNING!!!!
					// The following code is for experimentation only. It will produce REALLY
					// horrible assembly. For better performance we need to use an unrolled loop (i.e. fixed sample count)
					//for (int i=0; i<softShadowParams.sampleCount; ++i)

					[unroll]
					for (int i=0; i<35; ++i)
					{
						float2 texCoord1 = g_PoissonDisk[i] * radius;
						float2 texCoord;
						
						texCoord.x = texCoord1.x * noise.x - texCoord1.y * noise.y;
						texCoord.y = texCoord1.x * noise.y + texCoord1.y * noise.x;

						texCoord.y *= 0.25;

						shadowIntensity += (coords.z > readShadowMap(g_ShadowMapTex, coords.xy + texCoord));
						count += 1.0;
					}
				}

				shadowIntensity /= count;

				return shadowIntensity;

			#else // SOFT_SHADOWS

				// 2x2tap bilinear PCF fall back
				float2 offset = (1.0 / g_ShadowMapSize);
				offset.y *= 0.25;

				float4 shadowMapZ; 
				shadowMapZ.x = readShadowMap(g_ShadowMapTex, coords.xy + float2(+1,  0) * offset);	// Top Right
				shadowMapZ.y = readShadowMap(g_ShadowMapTex, coords.xy + float2( 0, +1) * offset);	// Bottom Left
				shadowMapZ.z = readShadowMap(g_ShadowMapTex, coords.xy + float2(+1, +1) * offset);	// Bottom Right
				shadowMapZ.w = readShadowMap(g_ShadowMapTex, coords.xy + float2( 0,  0) * offset);	// Top Left

				float4 depthCompare = coords.z > shadowMapZ;

				float4 weights = frac(coords.xyxy * g_ShadowMapSize * float4(1,4,1,4));

				weights = float4(	   weights.x	*	(1-weights.y),		// R == Top Right
									(1-weights.x)	*	   weights.y,		// G == Bottom Left
									   weights.x	*	   weights.y,		// B == Bottom Right
									(1-weights.x)	*	(1-weights.y) );	// A == Top Left

				return dot(depthCompare, weights);

			#endif // SOFT_SHADOWS

		#endif

	#endif // platform

#endif // PCF
}

//------------------------------------------------------------------------------------------------
// Shadow map lookup for cube map shadow maps
//------------------------------------------------------------------------------------------------
float4 LookupShadowmapCube(float4 lightSpaceCoords, float4 fragPosViewSpace, SoftShadowParams softShadowParams)
{
	float shadowMapZ = readShadowMap(g_ShadowMapCubeTex, normalize(lightSpaceCoords.xyz));

	// Linearize Z
	shadowMapZ = GetCube().projParams.x / (1.0 - shadowMapZ * GetCube().projParams.y);

	// And you thought sampling a shadow cubemap would be easy...!
	// Each cubemap face contains the distance of the shadow caster from the light *along the direction of that face*.
	// It doesn't contain the Euclidean distance of the occluder from the light. So we can't compare against that.
	// So, we need to find which cube face the current fragment belongs to, and work out the distance along that
	// face direction.
	// The alternative is to write a special vertex shader for the occluders that outputs z = Euclidean distance.
	float3 a = abs(lightSpaceCoords.xyz);
	float3 b = abs(lightSpaceCoords.yzx);
	float3 c = abs(lightSpaceCoords.zxy);

	float3 faceDir = a > max(b,c);

	float fragmentZ = abs(dot(lightSpaceCoords.xyz, faceDir));

	// Shadow acne fix
	fragmentZ *= GetCommon().shadowFactors.w;

	return (fragmentZ > shadowMapZ) ? 1.0 : 0.0;
}

//------------------------------------------------------------------------------------------------
// Caculates the light/shadow attenuation factor for a given view space position
//------------------------------------------------------------------------------------------------
half CalculateLightAttenuation(float4 viewSpaceCoords, float4 lightSpaceCoords)
{
	half outAttenuationFactor = 1.0;

	// Point/Spot light attenuation
	#if defined(SPOT_LIGHT) || defined(POINT_LIGHT) || defined(DUAL_PARABOLOID) /*|| defined(CUBE)*/

		// Transform .xy from UV to clip space
		// Note : For point/spot lights V is in [0..1/4] not [0..1]
		half2 temp = lightSpaceCoords.xy * half2(2,-2*4) + half2(-1,1);

		#if defined(SPOT_LIGHT) || defined(POINT_LIGHT)

			float4 params = GetSpotPoint().params;
		
		#elif defined(DUAL_PARABOLOID)

			float4 params = GetDualParaboloid().params;

		#elif defined(CUBE)

			float4 params = GetCube().params;

		#endif

		#if defined(POINT_LIGHT) || defined(DUAL_PARABOLOID) || defined(CUBE)

			// Anything closer than the near plane doesn't shadow. We don't do the same for the far plane
			// as it might only extend as far as the shadow caster (not the actual shadow). The attenuation
			// calc below ensures we don't have shadows beyond the light's range anyway.
			outAttenuationFactor *= (lightSpaceCoords.w > params.z);

			// Anything outside the side frustum planes doesn't shadow
			outAttenuationFactor *= (temp.x*temp.x) < 1.0;
			outAttenuationFactor *= (temp.y*temp.y) < 1.0;

			// Attenuate by distance from light
			half3 lightDir = GetCommon().lightDirPos.xyz - viewSpaceCoords.xyz;
			half attenuationDistance = length(lightDir);
			outAttenuationFactor *= 1.0 - saturate((attenuationDistance - params.x) * params.y);

		#else

			// Attenuate along spot light direction
			outAttenuationFactor *= 1.0 - saturate((lightSpaceCoords.w - GetSpotPoint().params.x) * GetSpotPoint().params.y);

			#if !defined(SPOT_TID)
			// Attenuate radially
			outAttenuationFactor *= 1.0 - saturate(dot(temp, temp));
			#else
			float4 spotTex = tex2D(g_SpotTex, lightSpaceCoords.xy * float2(0.5, -0.5) + 0.5);
			outAttenuationFactor *= spotTex.a;
			#endif

		#endif

	#endif

	return outAttenuationFactor;
}

//------------------------------------------------------------------------------------------------
// Caculates attenuation factor along view direction
//------------------------------------------------------------------------------------------------
half CalculateViewSpaceAttenuation(float4 viewSpaceCoords)
{
	half outAttenuationFactor = 1.0;

	// Attenuate by view space Z (distance from camera)
	outAttenuationFactor *= 1.0 - saturate((viewSpaceCoords.z - GetCommon().shadowFactors.y) * GetCommon().shadowFactors.z);

	return outAttenuationFactor;
}

//------------------------------------------------------------------------------------------------
// Main deferred shadows pixel shader.
// Calculates the shadow contribution of a single light. The output of this shader goes to the 
// luminance buffer.
//------------------------------------------------------------------------------------------------
#if !defined(UBERSHADER)

float4 ShadowOnlyPS(VS_OUT interpolants) : COLOR
{
	half outShadowIntensity = 0;

	float4 coords = interpolants.coords;

#if defined(SPOT_LIGHT) || defined(POINT_LIGHT) || defined(DUAL_PARABOLOID) || defined(CUBE) || defined(GRID)
	// We have saved the w component of the fragment position into In.projectionParams.y
	coords /= interpolants.projectionParams.y;
#endif

	// Calculate fragment position in view space
	half4 fragPosViewSpace = BackProject(coords.xy, coords.zw, interpolants.projectionParams);

	// Calculate fragment position in light space
	SoftShadowParams softShadowParams;
	float4 fragPosLightSpace = ProjectToShadowMap(fragPosViewSpace, softShadowParams);

	// Compare fragment against the shadowmap
	outShadowIntensity = LookupShadowmap(fragPosLightSpace, fragPosViewSpace, softShadowParams);

//return float4(tex2D(g_ShadowMapTex, fragPosLightSpace.xy).rgb,1);
//return float4(fragPosLightSpace.z, fragPosLightSpace.z, fragPosLightSpace.z, 1.0);

	// Apply overall shadow intensity factor
	outShadowIntensity *= GetCommon().shadowFactors.x;

	// Attenuate shadow intensity
	outShadowIntensity *= CalculateLightAttenuation(fragPosViewSpace, fragPosLightSpace);
	outShadowIntensity *= CalculateViewSpaceAttenuation(fragPosViewSpace);

	// Check if we shouldn't apply shadows to this pixel
	float4 stencilValues = tex2D(g_StencilValuesTex, coords.xy);
	outShadowIntensity *= 1.0f - stencilValues.r;

	return float4(outShadowIntensity, outShadowIntensity, outShadowIntensity, 1.0);
}

//------------------------------------------------------------------------------------------------
// Applies the luminance map back onto the scene.
//------------------------------------------------------------------------------------------------
float4 ApplyLuminanceMapPS(VS_OUT interpolants) : COLOR
{
	//return float4(tex2D(g_LuminanceTex, interpolants.coords.xy).rgb, 0.5);
	return float4(0,0,0, tex2D(g_LuminanceTex, interpolants.coords.xy).r);
}

//------------------------------------------------------------------------------------------------
// Debug shader
//------------------------------------------------------------------------------------------------

float4 gridDebugColor : register(c200);

float4 DebugShadowPS(VS_OUT interpolants) : COLOR
{
	float4 coords = interpolants.coords;

#if defined(SPOT_LIGHT) || defined(POINT_LIGHT) || defined(DUAL_PARABOLOID) || defined(CUBE) || defined(GRID)
	// We have saved the w component of the fragment position into In.projectionParams.y
	coords /= interpolants.projectionParams.y;
#endif

	// Calculate fragment position in view space
	half4 fragPosViewSpace = BackProject(coords.xy, coords.zw, interpolants.projectionParams);

#if defined(PARALLEL)

	float4 nearMask = fragPosViewSpace.zzzz > GetParallel().ZRangeMin;
	float4 farMask = fragPosViewSpace.zzzz < GetParallel().ZRangeMax;
	float4 finalMask = nearMask * farMask;

	return float4(	float3(1,0,0) * finalMask.x +
					float3(0,0,1) * finalMask.y +
					float3(0,1,0) * finalMask.z +
					float3(1,1,0) * finalMask.w,
					dot(finalMask,1) > 0 ? 0.5 : 0);

#elif defined(CASCADE)

	float3 lightProjCoords = mul(fragPosViewSpace, GetCascade().eye2lightProjMtx);

	float radiusSq = max(lightProjCoords.x * lightProjCoords.x, lightProjCoords.y * lightProjCoords.y);

	float4 minMask = radiusSq.xxxx > GetCascade().MinRadiusSq;
	float4 maxMask = radiusSq.xxxx < GetCascade().MaxRadiusSq;
	float4 finalMask = minMask * maxMask;

	float alpha = 0.5;
	{
		SoftShadowParams temp;
		float3 texCoords = ProjectToShadowMapCascade(fragPosViewSpace, temp);

		texCoords.x *= g_ShadowMapSize;
		texCoords.y *= g_ShadowMapSize * 4;

		texCoords.x = floor(texCoords.x);
		texCoords.y = floor(texCoords.y);

		float index = texCoords.x + fmod(texCoords.y,2);

		if (fmod(index,2) == 0)
			alpha = 0.4;
	}

	return float4(	float3(1,0,0) * finalMask.x +
					float3(0,0,1) * finalMask.y +
					float3(0,1,0) * finalMask.z +
					float3(1,1,0) * finalMask.w,
					dot(finalMask,1) > 0 ? alpha : 0);

#elif defined(GRID)

	float alpha = 1.0;
	{
		SoftShadowParams temp;
		float3 texCoords = ProjectToShadowMapGrid(fragPosViewSpace, temp);

		texCoords.x *= g_ShadowMapSize;
		texCoords.y *= g_ShadowMapSize * 4;

		texCoords.x = floor(texCoords.x);
		texCoords.y = floor(texCoords.y);

		float index = texCoords.x + fmod(texCoords.y,2);

		if (fmod(index,2) == 0)
			alpha = 0.8;
	}

	return gridDebugColor * float4(1,1,1,alpha);

#elif defined(SPOT_LIGHT) || defined(POINT_LIGHT) || defined(DUAL_PARABOLOID) || defined(CUBE)

	return 0;

#else

	return 0;
#endif
}

//------------------------------------------------------------------------------------------------
// Debug pixel shader to convert shadowmap into R32F texture
//------------------------------------------------------------------------------------------------
float4 ShadowmapToR32F(VS_OUT interpolants) : COLOR0
{
	float shadowMapZ = readShadowMap(g_ShadowMapTex, interpolants.coords.xy);

	return float4(shadowMapZ, shadowMapZ, shadowMapZ, shadowMapZ);
}

//------------------------------------------------------------------------------------------------
// Debug vertex shader used to visualize the contents of the shadow map
//------------------------------------------------------------------------------------------------

sampler2D 	g_ShadowMapTexVS		: register(s0);	// floating point texture
rm float4x4 g_InverseShadowViewProj : register(c200);
float4		g_ShadowMapUVScaleBias	: register(c204);
float4		g_ShadowMapPosScaleBias	: register(c205);

float4 DebugVisualizeShadowmapVS(	float4 position : POSITION,
									float2 texCoord : TEXCOORD0) : POSITION
{
	float2 texCoords = texCoord * g_ShadowMapUVScaleBias.xy + g_ShadowMapUVScaleBias.zw;
	float shadowMapZ = tex2Dlod(g_ShadowMapTexVS, float4(texCoords.xy,0,0)).r;

	float4 positionLightSpace = float4(position.xy * g_ShadowMapPosScaleBias.xy + g_ShadowMapPosScaleBias.zw, shadowMapZ, 1);
	float4 positionWorldSpace = mul(positionLightSpace, g_InverseShadowViewProj);

	float4 outPosition = mul(positionWorldSpace, viewProj);

	return outPosition;
}

#endif // UBERSHADER



// Scratchpad

/*
			////////////////////////////////////////////////////////////////////////////////////////////////////////
			// PCSS Poisson
			{
				//float2 blockerRadius = (1.0 / g_ShadowMapSize) * GetCommon().params.x;
				//blockerRadius.y *= 0.25;

				//// ----------------------
				//// Blocker search
		
				//float averageOccluderZ = 0;
				//float occluderCount = 0;

				//for (int i=0; i<16; ++i)
				//{
				//	float occluderDepth = readShadowMap(g_ShadowMapTex, coords.xy + g_PoissonDisk[i] * blockerRadius);

				//	if (coords.z > occluderDepth)
				//	{
				//		averageOccluderZ += occluderDepth;
				//		occluderCount += 1.0;
				//	}
				//}

				//// No occluders, no shadow
				//if (occluderCount == 0)
				//	return 0;

				//averageOccluderZ /= occluderCount;

				//// ----------------------
				//// PCF

				//float2 radius = (1.0 / g_ShadowMapSize) * (unclampedFragmentZ-averageOccluderZ) / averageOccluderZ * GetCommon().params.y;
				//radius.y *= 0.25;

				//float shadowIntensity = 0;

				//for (int j=0; j<16; ++j)
				//{
				//	shadowIntensity += (coords.z > readShadowMap(g_ShadowMapTex, coords.xy + g_PoissonDisk[j] * radius));
				//}

				//shadowIntensity /= 16.0;

				////return smoothstep(0, 1, shadowIntensity);
				//return shadowIntensity;
			}
			////////////////////////////////////////////////////////////////////////////////////////////////////////

			////////////////////////////////////////////////////////////////////////////////////////////////////////
			// Poisson PCF
			{
				//float2 radius = (1.0 / g_ShadowMapSize) * GetCommon().params.x;
				//radius.y *= 0.25;

				//float shadowIntensity = 0;

				//for (int i=0; i<16; ++i)
				//{
				//	shadowIntensity += (coords.z > readShadowMap(g_ShadowMapTex, coords.xy + g_PoissonDisk[i] * radius));
				//}

				//shadowIntensity /= 16.0;

				////return smoothstep(0, 1, shadowIntensity);
				//return shadowIntensity;
			}
			////////////////////////////////////////////////////////////////////////////////////////////////////////

			////////////////////////////////////////////////////////////////////////////////////////////////////////
			// NxN tap, filtered
			{
				//float2 offset = (1.0 / g_ShadowMapSize) * GetCommon().params.x;
				//offset.y *= 0.25;

				//float4 fractions = frac(coords.xyxy * g_ShadowMapSize * float4(1,4,1,4));

				//float shadowIntensity = 0;

				//static const int kernelHalfSize = 3;
				////[unroll]
				//for (int i=-kernelHalfSize; i<(kernelHalfSize+2); ++i)
				//{
				//	//[unroll]
				//	for (int j=-kernelHalfSize; j<(kernelHalfSize+2); ++j)
				//	{
				//		float weight = 1;

				//		// Left edge
				//		if (i==-kernelHalfSize)
				//			weight *= (1-fractions.x);

				//		// Right edge
				//		if (i==(kernelHalfSize+1))
				//			weight *= fractions.x;

				//		// Top edge
				//		if (j==-kernelHalfSize)
				//			weight *= (1-fractions.y);

				//		// Bottom edge
				//		if (j==(kernelHalfSize+1))
				//			weight *= fractions.y;

				//		shadowIntensity += weight * (coords.z > readShadowMap(g_ShadowMapTex, coords.xy + float2(i, j) * offset));
				//	}
				//}

				//shadowIntensity /= ( 2.0 * float(kernelHalfSize) + 1.0 ) * ( 2.0 * float(kernelHalfSize) + 1.0 );

				//return smoothstep(0, 1, shadowIntensity);
			}
			////////////////////////////////////////////////////////////////////////////////////////////////////////

			////////////////////////////////////////////////////////////////////////////////////////////////////////
			// NxN tap, point sample
			{
				//float2 offset = (1.0 / g_ShadowMapSize) * GetCommon().params.x;
				//offset.y *= 0.25;

				//float shadowIntensity = 0;

				//static const int kernelHalfSize = 6;
				//float count = 0;
				//for (int i=-kernelHalfSize; i<(kernelHalfSize+1); ++i)
				//{
				//	for (int j=-kernelHalfSize; j<(kernelHalfSize+1); ++j)
				//	{
				//		shadowIntensity += (coords.z > readShadowMap(g_ShadowMapTex, coords.xy + float2(i, j) * offset));
				//		count += 1.0;
				//	}
				//}

				//shadowIntensity /= count;

				//return shadowIntensity;
			}
			////////////////////////////////////////////////////////////////////////////////////////////////////////

*/
